{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import keras\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from keras.layers import Input, Dense\n",
    "from keras.models import Model, Sequential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"../housing-conditions-in-copenhagen/data.csv\",sep=\"\\s+\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>housing</th>\n",
       "      <th>influence</th>\n",
       "      <th>contact_with_neighbours</th>\n",
       "      <th>satisfaction</th>\n",
       "      <th>n</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   housing  influence  contact_with_neighbours  satisfaction   n\n",
       "1        1          1                        1             1  21\n",
       "2        1          1                        1             2  21\n",
       "3        1          1                        1             3  28\n",
       "4        1          1                        2             1  14\n",
       "5        1          1                        2             2  19"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>housing</th>\n",
       "      <th>influence</th>\n",
       "      <th>contact_with_neighbours</th>\n",
       "      <th>satisfaction</th>\n",
       "      <th>n</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>72.00000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>72.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.50000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>23.347222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.12588</td>\n",
       "      <td>0.822226</td>\n",
       "      <td>0.503509</td>\n",
       "      <td>0.822226</td>\n",
       "      <td>17.666041</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.75000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>10.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.50000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>19.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.25000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>31.750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.00000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>86.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        housing  influence  contact_with_neighbours  satisfaction          n\n",
       "count  72.00000  72.000000                72.000000     72.000000  72.000000\n",
       "mean    2.50000   2.000000                 1.500000      2.000000  23.347222\n",
       "std     1.12588   0.822226                 0.503509      0.822226  17.666041\n",
       "min     1.00000   1.000000                 1.000000      1.000000   3.000000\n",
       "25%     1.75000   1.000000                 1.000000      1.000000  10.000000\n",
       "50%     2.50000   2.000000                 1.500000      2.000000  19.500000\n",
       "75%     3.25000   3.000000                 2.000000      3.000000  31.750000\n",
       "max     4.00000   3.000000                 2.000000      3.000000  86.000000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_new = pd.concat([df,pd.get_dummies(df[\"housing\"],prefix=\"housing\")],axis=1).drop([\"housing\"],axis=1)\n",
    "df_new = pd.concat([df_new,pd.get_dummies(df[\"influence\"],prefix=\"influence\")],axis=1).drop([\"influence\"],axis=1)\n",
    "df_new = pd.concat([df_new,pd.get_dummies(df[\"satisfaction\"],prefix=\"satisfaction\")],axis=1).drop([\"satisfaction\"],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_new[\"contact_with_neighbours\"] = df_new[\"contact_with_neighbours\"].map(lambda v: 0 if v == 1 else 1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>contact_with_neighbours</th>\n",
       "      <th>n</th>\n",
       "      <th>housing_1</th>\n",
       "      <th>housing_2</th>\n",
       "      <th>housing_3</th>\n",
       "      <th>housing_4</th>\n",
       "      <th>influence_1</th>\n",
       "      <th>influence_2</th>\n",
       "      <th>influence_3</th>\n",
       "      <th>satisfaction_1</th>\n",
       "      <th>satisfaction_2</th>\n",
       "      <th>satisfaction_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>19</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   contact_with_neighbours   n  housing_1  housing_2  housing_3  housing_4  \\\n",
       "1                        0  21          1          0          0          0   \n",
       "2                        0  21          1          0          0          0   \n",
       "3                        0  28          1          0          0          0   \n",
       "4                        1  14          1          0          0          0   \n",
       "5                        1  19          1          0          0          0   \n",
       "\n",
       "   influence_1  influence_2  influence_3  satisfaction_1  satisfaction_2  \\\n",
       "1            1            0            0               1               0   \n",
       "2            1            0            0               0               1   \n",
       "3            1            0            0               0               0   \n",
       "4            1            0            0               1               0   \n",
       "5            1            0            0               0               1   \n",
       "\n",
       "   satisfaction_3  \n",
       "1               0  \n",
       "2               0  \n",
       "3               1  \n",
       "4               0  \n",
       "5               0  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_new.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y = df_new[\"contact_with_neighbours\"].values.reshape(-1,1)\n",
    "X = df_new.drop([\"contact_with_neighbours\"], axis=1).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((72, 1), (72, 11))"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape, X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "indices = np.arange(72)\n",
    "np.random.shuffle(indices)\n",
    "num_validation_samples = int(0.2*72)\n",
    "\n",
    "y = y[indices]\n",
    "X = X[indices]\n",
    "\n",
    "X_train = X[:-num_validation_samples]\n",
    "y_train = y[:-num_validation_samples]\n",
    "X_val = X[-num_validation_samples:]\n",
    "y_val = y[-num_validation_samples:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((58, 11), (58, 1), (14, 11), (14, 1))"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape,y_train.shape,X_val.shape, y_val.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 72 samples, validate on 14 samples\n",
      "Epoch 1/300\n",
      "72/72 [==============================] - 0s - loss: 0.4841 - mean_squared_error: 0.4841 - val_loss: 0.5723 - val_mean_squared_error: 0.5723\n",
      "Epoch 2/300\n",
      "72/72 [==============================] - 0s - loss: 0.4754 - mean_squared_error: 0.4754 - val_loss: 0.5702 - val_mean_squared_error: 0.5702\n",
      "Epoch 3/300\n",
      "72/72 [==============================] - 0s - loss: 0.4644 - mean_squared_error: 0.4644 - val_loss: 0.5651 - val_mean_squared_error: 0.5651\n",
      "Epoch 4/300\n",
      "72/72 [==============================] - 0s - loss: 0.4527 - mean_squared_error: 0.4527 - val_loss: 0.5474 - val_mean_squared_error: 0.5474\n",
      "Epoch 5/300\n",
      "72/72 [==============================] - 0s - loss: 0.4281 - mean_squared_error: 0.4281 - val_loss: 0.5112 - val_mean_squared_error: 0.5112\n",
      "Epoch 6/300\n",
      "72/72 [==============================] - 0s - loss: 0.3883 - mean_squared_error: 0.3883 - val_loss: 0.4228 - val_mean_squared_error: 0.4228\n",
      "Epoch 7/300\n",
      "72/72 [==============================] - 0s - loss: 0.3187 - mean_squared_error: 0.3187 - val_loss: 0.2956 - val_mean_squared_error: 0.2956\n",
      "Epoch 8/300\n",
      "72/72 [==============================] - 0s - loss: 0.2710 - mean_squared_error: 0.2710 - val_loss: 0.2641 - val_mean_squared_error: 0.2641\n",
      "Epoch 9/300\n",
      "72/72 [==============================] - 0s - loss: 0.2717 - mean_squared_error: 0.2717 - val_loss: 0.2575 - val_mean_squared_error: 0.2575\n",
      "Epoch 10/300\n",
      "72/72 [==============================] - 0s - loss: 0.2657 - mean_squared_error: 0.2657 - val_loss: 0.2420 - val_mean_squared_error: 0.2420\n",
      "Epoch 11/300\n",
      "72/72 [==============================] - 0s - loss: 0.2757 - mean_squared_error: 0.2757 - val_loss: 0.2439 - val_mean_squared_error: 0.2439\n",
      "Epoch 12/300\n",
      "72/72 [==============================] - 0s - loss: 0.2635 - mean_squared_error: 0.2635 - val_loss: 0.2357 - val_mean_squared_error: 0.2357\n",
      "Epoch 13/300\n",
      "72/72 [==============================] - 0s - loss: 0.2719 - mean_squared_error: 0.2719 - val_loss: 0.2336 - val_mean_squared_error: 0.2336\n",
      "Epoch 14/300\n",
      "72/72 [==============================] - 0s - loss: 0.2692 - mean_squared_error: 0.2692 - val_loss: 0.2338 - val_mean_squared_error: 0.2338\n",
      "Epoch 15/300\n",
      "72/72 [==============================] - 0s - loss: 0.2605 - mean_squared_error: 0.2605 - val_loss: 0.2557 - val_mean_squared_error: 0.2557\n",
      "Epoch 16/300\n",
      "72/72 [==============================] - 0s - loss: 0.2586 - mean_squared_error: 0.2586 - val_loss: 0.2461 - val_mean_squared_error: 0.2461\n",
      "Epoch 17/300\n",
      "72/72 [==============================] - 0s - loss: 0.2508 - mean_squared_error: 0.2508 - val_loss: 0.2407 - val_mean_squared_error: 0.2407\n",
      "Epoch 18/300\n",
      "72/72 [==============================] - 0s - loss: 0.2496 - mean_squared_error: 0.2496 - val_loss: 0.2806 - val_mean_squared_error: 0.2806\n",
      "Epoch 19/300\n",
      "72/72 [==============================] - 0s - loss: 0.2535 - mean_squared_error: 0.2535 - val_loss: 0.2430 - val_mean_squared_error: 0.2430\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fd3e0748cc0>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = Sequential()\n",
    "model.add(Dense(1, \n",
    "                activation='sigmoid', \n",
    "                input_dim=X.shape[1]))\n",
    "\n",
    "rmsprop = keras.optimizers.RMSprop(lr=0.01)\n",
    "model.compile(optimizer=rmsprop, loss='mse', metrics=['mse'])\n",
    "\n",
    "early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')\n",
    "\n",
    "model.fit(X, \n",
    "          y, \n",
    "          epochs=300, \n",
    "          validation_data=(X_val, y_val),\n",
    "          callbacks = [early_stopping])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Global TF Kernel (Python 3)",
   "language": "python",
   "name": "global-tf-python-3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
